##########################################################
# John Henderson
# Replication Data for: "Hookworm Eradication as a Natural 
#   Experiment for Schooling and Voting in the American 
#   South", Forthcoming in Political Behavior, May 20, 2017
# 
##########################################################
#
#  figure_1.R
#  -- file produces the map in figure 1
#
##########################################################
 
rm(list=ls())
            
#library(nbpMatching)   
#library(Matching)
#library(rgenoud)
#library(AER)                           
#library(foreign) 
#library(penalized)

setwd('~/Dropbox/hookworm/replication')  
#source('lower.bound.R')
#source('funs/pscoreFun.R') 
#source('funs/mahalFun.R') 
#source('funs/mahalMatch.R') 
#source('funs/nbpGreedy.R')
#source('funs/balFun.R')
            
load('data/hookwormData.Rdata') 

##########################################################  
# data required 
#  - mMat :: matching matrix used in matching and balancing
#  - oMat :: outcome matrix of county education and turnout rates
#  - zMat :: intervention matrix 
#  - pMat :: placebo matrix
# hookSouth :: combines all objects into one dataset, w/ additional covariates  
##########################################################         

library(maps)
library(mapproj) 
library(stringr)

data(county.fips)
      
 
sts=str_sub(tolower(as.character(hookSouth$state)),1,str_length(tolower(as.character(hookSouth$state)))-3)             
sts[which(str_sub(sts,str_length(sts))==' ')]=str_sub(sts[which(str_sub(sts,str_length(sts))==' ')],1,str_length(sts[which(str_sub(sts,str_length(sts))==' ')])-1)
sts[which(sts=='tennesse')]='tennessee'
ccde=str_sub(paste('00',str_sub(hookSouth$cCode,1,str_length(hookSouth$cCode)-1),sep=''),str_length(paste('00',str_sub(hookSouth$cCode,1,str_length(hookSouth$cCode)-1),sep=''))-2)                
cnt_scode=as.numeric(paste(hookSouth$sCode,ccde,sep=''))

qn=as.character(county.fips[,2])
stt=paste(sts,tolower(as.character(hookSouth$county)),sep=',')               
sta=matrix(NA,length(stt),3)     
nas=array(0,length(stt))  

for(i in 1:length(sta[,1])){
	ix=which(stt[i]==qn)	 
	if(length(ix)==1){
		sta[i,1]=county.fips[ix,1]
		sta[i,2]=qn[ix]	
		sta[i,3]=ix
	} else if(length(ix)!=1){
		nas[i]=1        
		ix=which(c(adist(stt[i],qn))==min(c(adist(stt[i],qn))))		               
		
		if(length(ix)==1){
			sta[i,1]=county.fips[ix,1]
			sta[i,2]=qn[ix]	
			sta[i,3]=ix       
		}    			
	}
}        

sta[,1]=cnt_scode
           
# leave most/all NAs
               
sta=cbind(sta,NA)
#sta[which(hookData$z_hook==hookData_1$z_hook),4]=as.numeric(hookData$z_hook[which(hookData$z_hook==hookData_1$z_hook)])
sta[,4]=as.numeric(zMat$z_hook_rate)
sta=as.data.frame(sta)  

sta[,1]=as.numeric(as.character(sta[,1]))
sta[,2]=as.character(sta[,2])
sta[,3]=as.numeric(as.character(sta[,3]))
sta[,4]=as.numeric(as.character(sta[,4])) 
sta[,5]=zMat$z_hook_rate

#data(county.fips)

# define color buckets 
colors = colorRampPalette(c("cornflowerblue", "darkblue"))(10)
sta$colorBuckets <- as.numeric(cut(sta[,5], c(-.1, .1, .2, .3, .4, .5, .6, .7, .8,.9, 1.1)))
           
county.buckets=array(NA,length(county.fips))
for(i in 1:nrow(sta)){
	if(!is.na(sta[i,2])){
		ix=which(sta[i,2]==county.fips)
		county.buckets[ix]=sta$colorBuckets[i]				
	}   	
}
 

#sta[,2]
#county.fips

leg.txt <- c("<10%", "10-20%", "30-40%", "40-50%", "50-60%","60-70%","70-80%","80-90%","90-100%")
  # align data with map definitions by (partial) matching state,county
  # names, which include multiple polygons for some counties    
  
map.cnty=map("county", plot=FALSE)$names
colorsmatched=cnty.fips=array(NA,length(map.cnty))
for(i in 1:length(sta[,2])){
	if(!is.na(sta[i,2])){
		ix=which(sta[i,2]==map.cnty)
		if(length(ix)>0){
			colorsmatched[ix]=sta[i,6]
		}		
	}
}
	

GetColorHexAndDecimal <- function(color)
{
  c <- col2rgb(color)
  sprintf("#%02X%02X%02X %3d %3d %3d", c[1],c[2],c[3], c[1], c[2], c[3])
}           
    
GetColorHexAndDecimal("grey80")

#sort(unique(cls),decreasing=T)
cls=colors[colorsmatched]
leg.txt <- c("<10%", "10-20%", "30-40%", "40-50%", "50-60%","60-70%","70-80%","80-90%","90-100%")        
nms=map("county",col = colors[colorsmatched], plot=FALSE,fill = TRUE, resolution = 0,lty = 0, projection = "polyconic")$names

# impute odd missings that mismatched above 
un_nms=unique(str_sub(str_trim(tolower(hookSouth$state)),1))
#unique(str_sub(nms,1,str_locate(nms,',')[1]-1))
for(i in 1:length(un_nms)){
	if(any(!is.na(cls[grep(nms,pattern=un_nms[i])]))){
		#cls[grep(nms,pattern=un_nms[i])][which(is.na(cls[grep(nms,pattern=un_nms[i])]))]=names(sort(table(cls[grep(nms,pattern=un_nms[i])]),decreasing=T))[1]
	}
}
  
cls[which(str_sub(nms,1,4)=='iowa')]=NA
cls[which(str_sub(nms,1,4)=='west')]=NA  

cls[which(str_sub(nms,1,4)=='virg' & is.na(cls))]=names(sort(table(cls[which(str_sub(nms,1,4)=='virg' & !is.na(cls))]),decreasing=T))[1]
cls[which(str_sub(nms,1,4)=='alab' & is.na(cls))]=names(sort(table(cls[which(str_sub(nms,1,4)=='alab' & !is.na(cls))]),decreasing=T))[1]
cls[which(str_sub(nms,1,4)=='texa' & is.na(cls))]=names(sort(table(cls[which(str_sub(nms,1,4)=='texa' & !is.na(cls))]),decreasing=T))[1]
cls[which(str_sub(nms,1,4)=='loui' & is.na(cls))]=names(sort(table(cls[which(str_sub(nms,1,4)=='loui' & !is.na(cls))]),decreasing=T))[1]
                                         

pdf(file='~/Dropbox/hookworm/replication/figures/figure_1_hookmap_small.pdf')
map("county",col = cls, fill = TRUE, resolution = 0,
  lty = 0, projection = "polyconic")
map("state", col = "#CCCCCC99", fill = FALSE, add = TRUE, lty = 1, lwd = 0.2,
  projection="polyconic")
title("Hookworm Infection Rates, 1909 - 1910")
legend(x=-.179,y=.75, leg.txt, horiz = TRUE, fill = colors,cex=.37)
dev.off()

# grey scale

# define color buckets 
colors = colorRampPalette(c("grey75", "grey25"))(10)
sta$colorBuckets <- as.numeric(cut(sta[,5], c(-.1, .1, .2, .3, .4, .5, .6, .7, .8,.9, 1.1)))
           
county.buckets=array(NA,length(county.fips))
for(i in 1:nrow(sta)){
	if(!is.na(sta[i,2])){
		ix=which(sta[i,2]==county.fips)
		county.buckets[ix]=sta$colorBuckets[i]				
	}   	
}
 

#sta[,2]
#county.fips

leg.txt <- c("<10%", "10-20%", "30-40%", "40-50%", "50-60%","60-70%","70-80%","80-90%","90-100%")
  # align data with map definitions by (partial) matching state,county
  # names, which include multiple polygons for some counties    
  
map.cnty=map("county", plot=FALSE)$names
colorsmatched=cnty.fips=array(NA,length(map.cnty))
for(i in 1:length(sta[,2])){
	if(!is.na(sta[i,2])){
		ix=which(sta[i,2]==map.cnty)
		if(length(ix)>0){
			colorsmatched[ix]=sta[i,6]
		}		
	}
}
	

GetColorHexAndDecimal <- function(color)
{
  c <- col2rgb(color)
  sprintf("#%02X%02X%02X %3d %3d %3d", c[1],c[2],c[3], c[1], c[2], c[3])
}           
    
GetColorHexAndDecimal("grey80")

#sort(unique(cls),decreasing=T)
cls=colors[colorsmatched]
leg.txt <- c("<10%", "10-20%", "30-40%", "40-50%", "50-60%","60-70%","70-80%","80-90%","90-100%")        
nms=map("county",col = colors[colorsmatched], plot=FALSE,fill = TRUE, resolution = 0,lty = 0, projection = "polyconic")$names

# impute odd missings that mismatched above 
un_nms=unique(str_sub(str_trim(tolower(hookSouth$state)),1))
#unique(str_sub(nms,1,str_locate(nms,',')[1]-1))
for(i in 1:length(un_nms)){
	if(any(!is.na(cls[grep(nms,pattern=un_nms[i])]))){
		#cls[grep(nms,pattern=un_nms[i])][which(is.na(cls[grep(nms,pattern=un_nms[i])]))]=names(sort(table(cls[grep(nms,pattern=un_nms[i])]),decreasing=T))[1]
	}
}
  
cls[which(str_sub(nms,1,4)=='iowa')]=NA
cls[which(str_sub(nms,1,4)=='west')]=NA  

cls[which(str_sub(nms,1,4)=='virg' & is.na(cls))]=names(sort(table(cls[which(str_sub(nms,1,4)=='virg' & !is.na(cls))]),decreasing=T))[1]
cls[which(str_sub(nms,1,4)=='alab' & is.na(cls))]=names(sort(table(cls[which(str_sub(nms,1,4)=='alab' & !is.na(cls))]),decreasing=T))[1]
cls[which(str_sub(nms,1,4)=='texa' & is.na(cls))]=names(sort(table(cls[which(str_sub(nms,1,4)=='texa' & !is.na(cls))]),decreasing=T))[1]
cls[which(str_sub(nms,1,4)=='loui' & is.na(cls))]=names(sort(table(cls[which(str_sub(nms,1,4)=='loui' & !is.na(cls))]),decreasing=T))[1]
                                         
 


pdf(file='~/Dropbox/hookworm/replication/figures/figure_1_hookmap_small_bw.pdf')
map("county",col = cls, fill = TRUE, resolution = 0,
  lty = 0, projection = "polyconic")
map("state", col = "#CCCCCC99", fill = FALSE, add = TRUE, lty = 1, lwd = 0.2,
  projection="polyconic")
title("Hookworm Infection Rates, 1909 - 1910")
legend(x=-.179,y=.75, leg.txt, horiz = TRUE, fill = colors,cex=.37)
dev.off()

#END